%%This is a very basic article template.
%%There is just one section and two subsections.
\documentclass[xcolor=pdftex,dvipsnames,table]{beamer}
\usetheme{Warsaw}
\usepackage{graphicx}
\usepackage{listings}
\usepackage{multirow}
\usepackage{multicol}
\usepackage{pgfplots}
\usepackage{tikz}
\usetikzlibrary{positioning}
\author[Nicolas Kuchmann-Beauger]{Nicolas Kuchmann-Beauger\\ {\tiny \underline{supervised by:}}\\ {\small Marie-Aude Aufaure}}
\title{Question Answering System\\ in a Business Intelligence Context}
\institute{\'Ecole Centrale Paris -- SAP Research\\
\vspace{2mm}
\underline{Jury:}\\
{\tiny
\begin{tabular}{ll}
Matthieu Roche & Universit\'e Montpellier 2\\
Juan Carlos Trujillo Mond\'ejar & Universidad de Alicante\\
Marie-Aude Aufaure & \'Ecole Centrale Paris\\
Patrick Marcel & Universit\'e de Tours\\
Yannick Cras & SAP Research\\
\'Elisabeth M\'etais & CNAM Paris\\
Philippe Meiniel (guest member) & SAP France\\
\end{tabular}
}
}
\date{February 15, 2012}
\definecolor{UniBlue}{RGB}{255,127,36}
\definecolor{SandyBrown}{RGB}{244,164,96}
%\setbeamercolor{title}{fg=UniBlue}
\setbeamercolor{frametitle}{fg=white}
\setbeamercolor{structure}{fg=UniBlue}
\setbeamertemplate{headline}{}
\pgfdeclareimage[height=0.5cm]{ecp-logo}{img/ecp}
\pgfdeclareimage[height=0.5cm]{sap-logo}{img/sap}

\titlegraphic{\pgfuseimage{ecp-logo}\hspace*{4.75cm}~%
   \pgfuseimage{sap-logo}
}



\defbeamertemplate*{footline}{shadow theme}
{%
  \leavevmode%
  \hbox{\begin{beamercolorbox}[wd=.5\paperwidth,ht=2.5ex,dp=1.125ex,leftskip=.3cm plus1fil,rightskip=.3cm]{author in head/foot}%
    \usebeamerfont{author in head/foot}\insertframenumber\,/\,\inserttotalframenumber\hfill\insertshortauthor
  \end{beamercolorbox}%
  \begin{beamercolorbox}[wd=.5\paperwidth,ht=2.5ex,dp=1.125ex,leftskip=.3cm,rightskip=.3cm plus1fil]{title in head/foot}%
    \usebeamerfont{title in head/foot}\insertshorttitle%
  \end{beamercolorbox}}%
  \vskip0pt%
}

\newcommand*\circled[1]{\smash{\scalebox{0.8}{\tikz[baseline=(char.base)]{\node[shape=circle,draw,inner sep=2pt,color=UniBlue] (char) {#1};}}}}

\newcommand{\gear}[5]{%
\foreach \i in {1,...,#1} {%
  [rotate=(\i-1)*360/#1]  (0:#2)  arc (0:#4:#2) {[rounded corners=1.5pt]
             -- (#4+#5:#3)  arc (#4+#5:360/#1-#5:#3)} --  (360/#1:#2)
}} 

\newcommand*\lightrednode[1]{\smash{\scalebox{0.6}{\tikz[baseline={([yshift=-1pt]meas.base)}]{
\node[circle,minimum size=10pt,draw=black,
	font=\tiny,fill=red!10,node distance=2.5cm](meas){#1};}}}}
\newcommand*\darkrednode[0]{\smash{\scalebox{0.6}{\tikz[baseline={([yshift=-4pt]meas1Label.base)}]{
\node[circle,minimum size=10pt,draw=black, font=\tiny, fill=red!100](meas1Label){};}\hspace{4pt}}}}
\newcommand*\yellownode[1]{\smash{\scalebox{0.6}{\tikz[baseline={([yshift=0.5pt]yellowNode.base)}]{\node[circle,minimum
size=10pt,draw=black,
font=\tiny,fill=yellow!30,font=\small](yellowNode){#1};}}}}
\newcommand*\lightyellownode[1]{\smash{\scalebox{0.6}{\tikz[baseline={([yshift=-1pt]proj1.base)}]{%
\node[circle,minimum
size=10pt,draw=black,font=\tiny,fill=yellow!10](proj1){#1};}}}}

\newcommand*\lightgreennode[1]{\smash{\scalebox{0.6}{\tikz[baseline={([yshift=-1pt]proj1.base)}]{%
\node[circle,minimum
size=10pt,draw=black,font=\tiny,fill=green!10](proj1){#1};}}}}
\newcommand*\lightbluenode[1]{\smash{\scalebox{0.6}{\tikz[baseline={([yshift=-1pt]proj1.base)}]{%
\node[circle,minimum
size=10pt,draw=black,font=\tiny,fill=blue!10](proj1){#1};}}}}


\newcommand{\backupbegin}{
   \newcounter{framenumberappendix}
   \setcounter{framenumberappendix}{\value{framenumber}}
}
\newcommand{\backupend}{
   \addtocounter{framenumberappendix}{-\value{framenumber}}
   \addtocounter{framenumber}{\value{framenumberappendix}} 
}



\begin{document}
\newcommand*\annotationnode[1]{\smash{\scalebox{0.6}{\tikz[baseline={([yshift=-1pt]proj1.base)}]{
\node[minimum height=0.8cm, draw=black!50, rounded corners, rectangle](proj1){#1};}}}}


{
\setbeamertemplate{footline}{} 
\begin{frame}[plain]
\beamertemplatenavigationsymbolsempty
  \titlepage
\end{frame}
}
\addtocounter{framenumber}{-1}

 \frame{
\begin{multicols}{2}
\tableofcontents
\end{multicols}
}

\section{Introduction}
\subsection{Motivation}

\frame{
\frametitle{Motivation}
Growth of the amount of data stored by companies:
\begin{table}
\begin{tabular}{crr}\hline
\multirow{2}{*}{Year} & \multicolumn{1}{c}{companies storing} & \multicolumn{1}{c}{companies storing}\\
 & \multicolumn{1}{c}{more than 1TB} & \multicolumn{1}{c}{more than 100TB}\\\hline\hline
2009 & 74\% & 24\%\\\hline
2010 & 87\% & 29\%\\\hline
\end{tabular}
\caption{Data growth trends~\cite{stadtmueller2011}}
\end{table} 
This leads to \underline{information overload}.\\
\vspace{5mm}
Databases have been introduced to organize data in a structured manner.
}
\subsection{Multidimensional model}



\frame{
\frametitle{Multidimensional model}
Multidimensional models ``facilitate complex analyzes and visualization''~\cite{Chaudhuri:1997:ODW:248603.248616}.

\begin{block}{Concepts}
\begin{itemize}
\item<1> Dimension or analysis axis
\item<2> Measure: depends on a subset of dimensions
\item<3> Hierarchies: organization of dimensions in a hierarchical manner 
\end{itemize}
\end{block}
\vspace{10pt}
\begin{columns}
\only<1,2>{
\begin{column}{0.3\textwidth}
\begin{tikzpicture}
\tikzstyle{a}=[
	draw,
	rectangle,
	node distance=0pt,
	inner sep=0pt,
	minimum width=20pt,
	minimum height=20pt
]
\node[
	a
] (cell-1) {};
\node[
	a,
	right=0pt of cell-1
] (cell-2) {};
\node[
	a,
	right=0pt of cell-2
] (cell-3) {};
\node[
	a,
	above=0pt of cell-1
] (cell-4) {};
\node[
	a,
	right=0pt of cell-4
] (cell-5) {};
\node[
	a,
	right=0pt of cell-5
] (cell-6) {};	
\node[
	a,
	above=0pt of cell-4
] (cell-7) {};
\node[
	a,
	right=0pt of cell-7
] (cell-8) {};
\node[
	a,
	right=0pt of cell-8
] (cell-9) {};
\node[
	node distance=0pt,
	inner sep=0pt,
	above=10pt of cell-7,
	minimum width=0pt
] (node-1) {};
\path[-] (cell-7.north west) edge (node-1);
\node[
	node distance=0pt,
	inner sep=0pt,
	above=10pt of cell-9,
	xshift=20pt,
	minimum width=0pt
] (node-2) {};
\path[-] (cell-9.north east) edge (node-2);
\path[-] (node-1) edge (node-2);
\node[
	node distance=0pt,
	inner sep=0pt,
	right=20pt of node-1,
	minimum width=0pt
] (node-3) {};
\path[-] (cell-7.north east) edge (node-3);
\node[
	node distance=0pt,
	inner sep=0pt,
	right=20pt of node-3,
	minimum width=0pt
] (node-4) {};
\path[-] (cell-8.north east) edge (node-4);
\node[
	node distance=0pt,
	inner sep=0pt,
	right=10pt of cell-3.south east,
	yshift=10pt,
	minimum width=0pt
] (node-5) {};
\path[-] (node-5) edge (node-2);
\path[-] (cell-3.south east) edge (node-5);
\node[
	node distance=0pt,
	inner sep=0pt,
	above=20pt of node-5,
	minimum width=0pt
] (node-6) {};
\path[-] (cell-6.south east) edge (node-6);
\node[
	node distance=0pt,
	inner sep=0pt,
	above=20pt of node-6,
	minimum width=0pt
] (node-7) {};
\path[-] (cell-9.south east) edge (node-7);
\node[
	node distance=0pt,
	minimum width=0pt,
	inner sep=0pt,
	right=10pt of node-5,
	yshift=10pt
] (nnode-1) {};
\path[dotted,->] (node-5) edge (nnode-1);
\node[
	node distance=0pt,
	minimum width=0pt,
	inner sep=0pt,
	right=10pt of node-2,
	yshift=10pt
] (nnode-2) {};
\path[dotted,->] (node-2) edge (nnode-2);
\node[
	node distance=0pt,
	inner sep=0pt,
	minimum width=0pt,
	right=10pt of node-1,
	yshift=10pt
] (nnode-3) {};
\path[dotted,->] (node-1) edge (nnode-3);
\node<1>[below=0pt of cell-2] {\color{UniBlue}analysis axis};
\path<1>[very thick,color=UniBlue] (cell-1.south west) edge (cell-3.south east);
\node<2>[a,fill=UniBlue] at (cell-3) {};
\draw<2>[fill=UniBlue] (cell-3.south east) -- (cell-3.north east) -- (node-6.center) -- (node-5.center) -- cycle;
\end{tikzpicture}
\end{column}
\begin{column}{0.7\textwidth}
\only<1>{\underline{Dimensions} on which users base their analyses of data.
\begin{itemize}
\item Numeric values (e.g. Year)
\item String values (e.g. Product, City, \ldots)
\end{itemize}
Dimensions are further described by attributes
}
\only<2>{\underline{Measures} KPIs used by business users to analyze data
}
\end{column}
}
\only<3>{
\begin{column}{\textwidth}
The time hierarchy can be described as
follows:
\begin{align*}
\fbox{Year}\rightarrow\fbox{Quarter}\rightarrow\fbox{Month}\rightarrow\fbox{Week}\rightarrow\fbox{Day}\
\ldots
\end{align*}
and similarly the geographic hierarchy can be described as:
\begin{align*}
\fbox{Region}\rightarrow\fbox{Country}\rightarrow\fbox{State}\rightarrow\fbox{County}\rightarrow\fbox{City}\rightarrow\fbox{Street}\
\ldots
\end{align*}
\end{column}
}
\end{columns}
}









\frame{\frametitle{Multidimensional data schema}
\begin{figure}
\includegraphics[scale=0.4]{img/data-schema}
\caption{Schema of the \emph{eFashion} dataset.}
\end{figure}
}


\frame{
	\frametitle{BI tools}
	\begin{figure}
	\includegraphics[width=0.9\textwidth]{img/bi-tool}
	\caption{BI tool for exploring data}
	\end{figure}
}


\subsection{Problem}
\begin{frame}
	\frametitle{Problem}

\begin{figure}
\begin{tikzpicture}
\tikzstyle{a}=[draw=white,node distance=0pt,inner sep=0pt]
\node[a] (label-1) {\bf user's input};
\node[a,below=2pt of label-1] (label-2) {NL, keywords}; 
\node[a, right=100pt of label-1] (label-3) {\bf results};
\node[a, below=2pt of label-3] (label-4) {database queries};
\node[a,right=10pt of label-1] (fake-node-1) {};
\node[a,left=10pt of label-3] (fake-node-2) {};
\path[->,very thick,bend left] (fake-node-1) edge node[yshift=10pt] {\large \bf ?} (fake-node-2);
%\draw[shade,scale=0.2] \gear{11}{3}{3.35}{10}{2};
%\path[shade,scale=0.2] circle(3);
%\draw[thick,double distance=2pt,fill=white,scale=0.2] circle(2.35);
\end{tikzpicture}
\end{figure}

The problem is stated as:
% finding a mapping $t$ from a question 
%$q$ to a family of results $(r_i)_i$:
	$$t:q\mapsto(r_i)_{i\in I}$$
%The index $i$ corresponds to the \emph{rank} of each result and can be 
%computed by a scoring function.	
i.e. mapping an ordered set of results to a user's input expressed in NL or in keywords.

\end{frame}
\subsection{Contributions}
\frame{
	\frametitle{Contribution to the state of the art}
	\begin{enumerate}
	  \item a comprehensive framework for Q\&A dedicated to business users 
	        leveraging contextual information to offer more personalized results
	  \item a NL query interface associated to a speech-to-text tool
	  \item a translation approach
	  \begin{itemize}
	    \item that has proven to be valid in at least 3 european languages
	    \item which graph-matching bases on constraint-satisfaction rules
	  \end{itemize} 
	   \item a plugin-based architecture which ensures a high degree of portability
	   \item an overall approach which makes the system quite indpendant with 
	   respect to the domain, according to our evaluation results
	\end{enumerate}
}

\section{State of the art}
\subsection{Main classes of approaches}
\frame{
\frametitle{State of the art}
%History of NL interfaces:
\begin{enumerate}
\item classic translation approach
\begin{enumerate}
\item early years of domain-specific systems~\cite{Green:1961:BAQ:1460690.1460714,Woods:1973:PNL:1499586.1499695}
\item complex question translation in a specific domain~\cite{Popescu:2004:MNL:1220355.1220376,Wang:2007:PPN:1419662.1419706}
\end{enumerate}
\item iterative approach
\begin{enumerate}
\item feedback-driven approaches~\cite{Li:2005:NIN:1066157.1066281,Cimiano:2007:PNL:1216295.1216330}
\item learning-based approaches~\cite{DBLP:conf/uai/ZettlemoyerC05,Miller:1996:FSA:981863.981871,Thompson:2003:AWM:1622420.1622421}
\item schema-unaware approaches~\cite{DBLP:conf/esws/LopezMU06,FerrucciBCFGKLMNPSW10}
\end{enumerate}
\end{enumerate}




%Main dimensions that we consider:
\begin{table}
\rowcolors{2}{UniBlue}{SandyBrown}
\begin{tabular}{|l|l|}\hline
\rowcolor{black}
\multicolumn{1}{|c|}{\bf\color{white} Dimensions} & \multicolumn{1}{c|}{\bf\color{white} Example}\\\hline
Data sources & Relational database, ontology, \ldots\\\hline
Users' questions & Keywords, NL\\\hline
Internal representation & Parse tree, triples, \ldots\\\hline
Database queries & SQL, MDX, \ldots\\\hline
Domain-independance & operate accross domains\\\hline
Portability & configuration, authoring tools, ML\\\hline
Evaluation metrics & Linguistic vs. logical coverage\\\hline
\end{tabular}
\caption{Main dimensions in Q\&A systems}
\end{table}


}

\subsection{Big picture}
\frame{
\frametitle{Big picture}
\begin{figure}
\begin{tikzpicture}[transform shape]
%\tikzset{VertexStyle/.append style = {minimum size = 3pt}}
\tikzstyle{circ}=[draw,circle,inner sep=2pt,color=UniBlue]
\node at (0,0) [circ](1){1};
\node at (2,0) [circ](2){2};
\node at (4,0) [circ](3){3};
\node at (6,0) [circ](4){4};
\node at (8,0) [circ](5){5};
\node at (0,-1) [draw=none,fill=none,label={[label distance=3pt]{NL query}}]
{}; 
\node at (2,0) [draw=none,fill=none,label={[label
distance=3pt]{intermediate query}}] {}; 
\node at (4,-1) [draw=none,fill=none,label={[label
distance=3pt]{query graph}}] {};
\node at (6,0) [draw=none,fill=none,label={[label
distance=3pt]{logic query graph}}] {};
\node at (8,-1) [draw=none,fill=none,label={[label
distance=3pt]{database query}}] {};
\draw[->](1) edge[bend left] (2);
\draw[->](2) edge[bend left] (3);
\draw[->](3) edge[bend left] (4);
\draw[->](4) edge[bend left] (5);
\draw[<->](1) edge[bend right] (3);
\draw[->](4) edge[bend left] (3);
\end{tikzpicture}
\caption{Big picture of NL interfaces}
\end{figure}

Steps involved in the translation of questions in database queries:
\begin{tabular}{cl}
\circled{1} & user's query\\
\circled{2} & intermediate representation\\
\circled{3} & parse graph of the query\\
\circled{4} & graph with semantic annotations\\
\circled{5} & target database query\\
\end{tabular}

}




\section{Answering system}
\subsection{Global architecture}
\frame{
\begin{columns}[T]
\begin{column}{.6\textwidth}
\includegraphics[scale=0.57,trim=0pt 0pt 0pt 10pt]{img/archi}
%\begin{tikzpicture}
%\node[anchor=south west,inner sep=0] at (0,0) {\includegraphics[scale=0.57, trim=0pt 0pt 0pt 10pt]{img/archi}};
%\draw<1>[red,ultra thick,rounded corners] (1.6,1) rectangle (\textheight-1cm,5);
%\draw[UniBlue,thick,rounded corners] (,10) rectangle (7.5,4.9);
%\end{tikzpicture}
\end{column}
\begin{column}{.4\textwidth}

\begin{block}{Architecture}
\begin{itemize}
\item search front-ends
\item query/answer graphs
\item user context
\item background knowledge
\item search engines
\end{itemize}
\end{block}
\begin{block}{Implementation details}
\begin{itemize}
\item asynchronous services
\item parallel algorithms
\item bound to SAP tools
\begin{itemize}
\item NLP tasks
\item database engine
\end{itemize}
\end{itemize}
\end{block}

\end{column}
\end{columns}

}

%\subsection{Q\&A component}
%\frame{
%\frametitle{Architecture of the answering system}
%\begin{figure}
%\includegraphics[scale=0.7,trim=15pt 0pt 0pt 20pt]{img/archi-new-3}
%\caption{Architecture of the answering system}
%\end{figure}
%\begin{columns}[T]
%\begin{column}{0.33\textwidth}
%\begin{block}{User context}
%\end{block}
%\end{column}
%\begin{column}{0.33\textwidth}
%\begin{block}{b}
%\end{block}
%\end{column}
%\begin{column}{0.33\textwidth}
%\begin{block}{c}
%\end{block}
%\end{column}
%\end{columns}
%}

\section{Linguistic patterns}
\subsection{Question parsing}
\frame{
	\frametitle{Question parsing}
\begin{columns}
\begin{column}{0.55\textwidth}
%\begin{block}{Parsing}
\begin{figure}
 \includegraphics[scale=0.9,trim=12pt 0pt 0pt 0pt]{img/running-example}
\caption{Question parsing}
\end{figure}
%\end{block}
\end{column}
\begin{column}{.5\textwidth}
%\begin{block}{Annotations}
\begin{itemize}
\item \underline{user profile:} `my city' as `Palo Alto' 
\item \underline{data schema:} `customers' as a dimension 
\item \underline{custom linguistic rules:}
\begin{itemize}
	\item `top-$k$' rule
	\item `basic query' rule
	\item domain-specific rules
\end{itemize}
\end{itemize}
%\end{block}
\end{column}
\end{columns}

}

\subsection{Parse graph}
\frame[plain]{
\begin{columns}
\begin{column}{0.35\textwidth}
	\begin{figure}
	\includegraphics[scale=0.6,trim=30pt 0pt 0pt 45pt]{img/parse-graph}
	\end{figure}
\end{column}
\begin{column}{0.65\textwidth}
\begin{tabular}{cl}
\yellownode{Q} & root node holds $n$ annotations\\
\lightbluenode{$x$} & node connecting to the user profile\\
\lightrednode{$x$} & node connecting to the data schema\\ 
\multirow{2}{*}{\lightgreennode{$x$}} & node connecting to domain-specific\\
 & rules (e.g. `middle-aged')\\
\annotationnode{label} & annotation node
\end{tabular}

\end{column}
\end{columns}
}

\subsection{Pattern}
\frame{
\frametitle{Pattern}
	\begin{figure}
	\includegraphics[width=0.9\textwidth]{img/pattern}
	\end{figure}
}

\begin{frame}[fragile]
\frametitle{Pattern}
Mapping to the internal representation: SparQL graph pattern.	
\begin{block}{Relational constraints}
\begin{itemize}
\item markers represent URIs, variables or literals
\item optional items with \verb|OPTIONAL| statement
\item annotation nodes \lightyellownode{$x$} bound to metadata nodes \lightrednode{$x$} associated to literals \darkrednode{} to be reused in the database template
\end{itemize}
\end{block}


\begin{block}{Property constraints}
Practical purpose: ensures not to generate duplicate queries
\begin{itemize}
\item matched text; annotations' offset and length 
\end{itemize}
\end{block}

\begin{block}{Additional variables}
\begin{itemize}
\item limit the size of the query results
\item define analysis types (choose better charts) 
\end{itemize}
\end{block}

\end{frame}

\section{Query model}

\frame{
\frametitle{Conceptual model of queries}
\begin{itemize}
\item<1-> dimensions
\item<1-> measures
\item<2-> filters
\item<3-> truncation
\item<3-> ordering
\end{itemize}
\underline{Example:}\\
\vspace{5pt}
\begin{center}
\only<1>{$\underbrace{\textnormal{Sales revenue}}_{\textnormal{measure}}$ per $\underbrace{\textnormal{year}}_{\textnormal{dimension}}$}
\only<2>{$\underbrace{\textnormal{Sales revenue}}_{\textnormal{measure}}$ per $\underbrace{\textnormal{year}}_{\textnormal{dimension}}$ in $\underbrace{\textnormal{New York}}_{\textnormal{filter}}$
}
\only<3>{Top $\underbrace{\textnormal{5}}_{\textnormal{truncation}}$ $\underbrace{\textnormal{stores}}_{\textnormal{dimension}}$ in $\underbrace{\textnormal{2011}}_{\textnormal{filter}}$\\
\vspace{10pt}
The measure is missing 
}
\end{center}
}



\begin{frame}[fragile]
\frametitle{Query model - example}
For instance, the following conceptual query 
\begin{equation}
\footnotesize
Q=\left[\begin{array}{lcl}
\textnormal{dimensions} & = & \{[\textnormal{Year}]\}\\
\textnormal{measures} & = & \{(\textnormal{Sales revenue})\}\\
\textnormal{filters} & = & \{[\textnormal{City}]\in \{\textnormal{`New York'},\textnormal{`Boston'}\}\}\\
\textnormal{truncation} & = & \emptyset\\
\textnormal{ordering} & = & [([\textnormal{Year}],(\textnormal{Sales
revenue}).\uparrow)] \end{array}\right]
\label{eq:modeling-conceptual-query-1}
\end{equation}
%and automatically translated in a database query language (e.g. SQL, MDX, \ldots) 
%in a subsequent step, using the BusinessObjects Semantic Layer. 
would be translated to the following SQL query:
\begin{multicols}{2}
\begin{lstlisting}[language=SQL,basicstyle=\ttfamily\fontsize{6}{7}\selectfont,numbers=left,numberstyle=\tiny]
SELECT
 sum(Table__2."AMOUNT_SOLD"), 
  Table__7."YR"
FROM
 "EFASHION"."CALENDAR_YEAR_LOOKUP" 
  Table__7 
INNER JOIN 
 "EFASHION"."SHOP_FACTS"  Table__2 
 ON (
  Table__2."WEEK_ID"=
  Table__7."WEEK_ID"
 )
INNER JOIN 
 "EFASHION"."OUTLET_LOOKUP"  Table__1 ON (
  Table__1."SHOP_ID"=Table__2."SHOP_ID"
 )
WHERE
 Table__1."CITY" IN ('New York', 'Boston')
GROUP BY
 Table__7."YR"
ORDER BY 2
\end{lstlisting}
\end{multicols}
\end{frame}



\subsection{Ranking function}
\frame{
\frametitle{How to rank results (1)}
Final ranking function is a combination of several metrics:
\begin{block}{Confidence}
\begin{itemize}
\item each annotation is given a confidence from the NER
\item each annotation type is given a weighting score 
\end{itemize}
\end{block}
\vspace{20pt}
\begin{columns}
\begin{column}{0.5\textwidth}
\underline{Example:}
``$\underbrace{\textnormal{Revenue}}_1$ in $\underbrace{\textnormal{my }\underbrace{\textnormal{city}}_2}_3$''
\begin{itemize}
\item $c_a<c_b$
\item $c_d<c_c$
\item $c_2<c_3$ 
\end{itemize}
\end{column}
\begin{column}{0.5\textwidth}
\begin{tabular}{c|l}
\multirow{2}{*}{1 $\rightarrow c_1$} & $\{\textnormal{Sales revenue}\rightarrow c_a,$\\
 & $\textnormal{Revenue}\rightarrow c_b\}$\\\hline
\multirow{2}{*}{2 $\rightarrow c_2$} & $\{\textnormal{City}\rightarrow c_c,$\\
 & $\textnormal{`New York City'}\rightarrow c_d\}$\\\hline
3 $\rightarrow c_3$ & $\{\textnormal{`Palo Alto'}\rightarrow c_e\}$
\end{tabular}
\end{column}
\end{columns}
}

\frame{
\frametitle{How to rank results (3)}
Additional metrics:
\begin{block}{Selectivity}
\begin{itemize}
\item promote specific patterns
\item considers the number of queries that have been generated
\end{itemize}
\end{block}
\begin{block}{Complexity}
\begin{itemize}
\item based on the number of \emph{entities} in the pattern
\end{itemize}
\end{block}
}


%\frame{
%\frametitle{How to rank results (2)}
%\begin{block}{Selectivity}
%\begin{itemize}
%\item promote specific patterns
%\end{itemize}
%\end{block}
%\begin{columns}[T]
%\begin{column}{0.6\textwidth}
%\underline{Example:}\\
%\includegraphics[scale=0.9,trim=12pt 0pt 0pt 0pt]{img/running-example}
%\end{column}
%\begin{column}{0.4\textwidth}
%basic query vs. top-$k$
%\end{column}
%\end{columns}
%}
%
%
%
%\frame{
%\frametitle{How to rank results (3)}
%\begin{block}{Complexity}
%\begin{itemize}
%\item number of \emph{entities} in the generated query
%\item entity types are given a weighting score
%\end{itemize}
%\end{block}
%\underline{Example:}\\
%}




\subsection{Personalization}




\section{Evaluation}
\subsection{Evaluation metrics}
\frame{
	\frametitle{Classic evaluation metrics}
	In IR, most common evaluation metrics are:
	\begin{itemize}
	  \item precision: $$p=\frac{|\{\textnormal{retrieved documents}\}\cap\{\textnormal{relevant
documents}\}|}{|\{\textnormal{retrieved documents}\}|}$$
\item recall: $$r=\frac{|\{\textnormal{retrieved documents}\}\cap\{\textnormal{relevant
documents}\}|}{|\{\textnormal{total relevant documents}\}|}$$
\item precision at $k$: $$p@k=\frac{\min(k,|\{\textnormal{retrieved documents}\}\cap\{\textnormal{relevant
documents}\}|)}{\min(|\textnormal{retrieved documents}|,k)}$$
	\end{itemize}
}


\subsection{Evaluation corpus}
\frame{
\frametitle{ManyEyes platform}
\begin{figure}
\includegraphics[width=\textwidth]{img/manyeyes}
\end{figure}
}

\frame{
	\frametitle{Test corpus}
	
	\begin{table}
	\tiny
	\begin{tabular}{lllll}\hline
\multicolumn{1}{c}{\textbf{Query}} & 
\multicolumn{1}{c}{\textbf{Updated query}} & &
\multicolumn{1}{c}{\textbf{Entities}} & 
\multicolumn{1}{c}{\textbf{Comment}}\\\hline\hline
State Population & & & $\{\textnormal{State, Population}$ &
\\
Change & & & $\textnormal{change}\}$ & \\\hline
Home Ownership & Owner-occupied & &
$\{\textnormal{State, Owner-occupied}$ & \multirow{2}{*}{home $\sim$ dwellings}\\
by State & dwellings by state & & $\textnormal{dwellings}\}$ & \\\hline
 USA States & & & \multirow{2}{*}{$\{\textnormal{State}\}$} & \\
information & & & & \\\hline
 Generation Y in  & population by year & &
 \multirow{2}{*}{$\{\textnormal{Year, Population}\}$} &
 generation $\sim$ demographic \\
 2010 (Ages 10-32) & for ages 10-32 &  & & information\\\hline
 And the whitest name & \multirow{2}{*}{names white percent} & &
 \multirow{2}{*}{$\{\textnormal{Name, White percent}\}$} &
 \multirow{2}{*}{whitest $\sim$ white} \\
 in America is & & & & \\\hline
 40+ Population & & & $\{\textnormal{Age,
 Population}$ & \\
 Projections by Age & & & $\textnormal{projection}\}$ & \\\hline
 Average Time Spent & & & $\{\textnormal{State}, \textnormal{Median travel}$ &
 \\
Commuting by State & & & $\textnormal{time to work}\}$ & \\\hline
\end{tabular}
\caption{Some queries from the test corpus}
\end{table}
}

\subsection{Results}
\frame{
\frametitle{Evaluation results}
\begin{columns}
\begin{column}{0.5\textwidth}
\begin{figure}
\begin{tikzpicture}
\begin{axis}[width=\linewidth,xlabel=$k$,ylabel={success@$k$},grid=both,
legend entries={\footnotesize \textsc{Quasl},\footnotesize\textsc{Quasl} with updated queries,\footnotesize WolframAlpha,\footnotesize WolframAlpha with updated queries},
legend style={cells={anchor=center, fill},
nodes={inner sep=1,below=-5pt},at={(0.5,-1)},anchor=south}]


\addplot[smooth,mark=*,color=black]
plot coordinates { (1,0.636363636) (2,0.727272727) (3,0.757575758) (4,0.787878788)
	(5,0.787878788)
	(6,0.787878788)
	(7,0.787878788)
	(8,0.848484848)
	(9,0.878787879)
	(10,0.909090909)
};
\addplot[smooth,mark=x,color=black] plot coordinates {
	(1,0.787878788)
	(2,0.878787879)
	(3,0.878787879)
	(4,0.909090909)
	(5,0.939393939)
	(6,0.939393939)
	(7,0.939393939)
	(8,0.939393939)
	(9,0.939393939)
	(10,0.939393939)
};
\addplot[smooth,mark=*,color=black,style=dotted] plot coordinates {
	(1,0.151515152)
	(2,0.151515152)
	(3,0.181818182)
	(4,0.242424242)
	(5,0.303030303)
	(6,0.303030303)
	(7,0.303030303)
	(8,0.303030303)
	(9,0.303030303)
	(10,0.303030303)
};
\addplot[smooth,mark=x,color=black,style=dotted] plot coordinates {
	(1,0.212121212)
	(2,0.212121212)
	(3,0.242424242)
	(4,0.303030303)
	(5,0.363636364)
	(6,0.393939394)
	(7,0.393939394)
	(8,0.393939394)
	(9,0.393939394)
	(10,0.393939394)
};
\end{axis}
\end{tikzpicture}
\label{fig:success-k-1}
\end{figure}
\end{column}
\begin{column}{0.5\textwidth}
\begin{figure}
%\includegraphics[width=0.7\textwidth]{img/eval-chart}
\begin{figure}
\begin{tikzpicture}
\begin{axis}[width=\linewidth,xlabel=$k$,ylabel={success@$k$},grid=both,
%legend entries={\footnotesize\textsc{Quasl},\footnotesize\textsc{Quasl} with updated queries, \footnotesize WolframAlpha,\footnotesize WolframAlpha with updated queries},
legend style={cells={anchor=center, fill},nodes={inner
sep=1,below=-5pt},at={(0.5,-0.9)},anchor=south}] 

\addplot[smooth,mark=*,color=black] 
plot coordinates { (1,0.677419355)
	(2,0.774193548)
	(3,0.806451613)
	(4,0.838709677)
	(5,0.838709677)
	(6,0.838709677)
	(7,0.838709677)
	(8,0.903225806)
	(9,0.935483871)
	(10,0.967741935)
};
\addplot[smooth,mark=x,color=black] plot coordinates {
	(1,0.838709677)
	(2,0.935483871)
	(3,0.935483871)
	(4,0.967741935)
	(5,1)
	(6,1)
	(7,1)
	(8,1)
	(9,1)
	(10,1)
};
\addplot[smooth,mark=*,color=black,style=dotted] plot coordinates {
	(1,0.384615385)
	(2,0.384615385)
	(3,0.461538462)
	(4,0.615384615)
	(5,0.769230769)
	(6,0.769230769)
	(7,0.769230769)
	(8,0.769230769)
	(9,0.769230769)
	(10,0.769230769)
};
\addplot[smooth,mark=x,color=black,style=dotted] plot coordinates {
	(1,0.538461538)
	(2,0.538461538)
	(3,0.615384615)
	(4,0.769230769)
	(5,0.923076923)
	(6,1)
	(7,1)
	(8,1)
	(9,1)
	(10,1)
};
\end{axis}
\end{tikzpicture}
%\caption{Variant of success of answering goldstandard queries compared to
%WolframAlpha\texttrademark{}}
%\label{fig:success-k-2}
\end{figure}
\end{figure}
\end{column}
\end{columns}
}

\section{Conclusion}
\frame{
\frametitle{Conclusion}
\begin{itemize}
\item Q\&A framework 
\begin{itemize}
\item dedicated to non-expert users
\item portable, low configuration effort
\item implemented in three languages (English, German, French)
\item constraint-based programming approach (patterns)
\item can interface additional query languages 
\end{itemize}
\item evaluation
\begin{itemize}
\item comparision to WolframAlpha on the Census dataset
\item comparable results, but minimal configuration in our case
\end{itemize} 
\end{itemize}
}


\frame{
\frametitle{Publications \& Patents}

Publications:
\begin{itemize}
\item A Natural Language Interface for Data Warehouse Question Answering, NLDB, 2011
\item Structured data-based Q\&A system using surface patterns, FQAS, 2011 
\item Semantics and Usage Statistics for Multi-Dimensional Query Expansion\footnote{In collaboration with Rapha\"el Thollot}, DASFAA, 2012
\end{itemize}


Patents:
\begin{itemize}
\item Context-aware Question Answering System, 2011P00569US (US)
\item Question Answering Framework for Structured Query Languages, 000005-030900US (US)
\end{itemize}
}

\frame{
\frametitle{Future work}
Research directions: 
\begin{itemize}
\item machine learning techniques (pattern acquisition)
\item query personalization
\item improve the linguistic coverage
\end{itemize}
}

\frame[allowframebreaks]{
\frametitle{Bibliography}
\bibliographystyle{amsalpha}
\bibliography{these} 
}

\frame{
\frametitle{Questions}
{\Huge \color{UniBlue}\bf Thank you!}\\
\vspace{15pt}
\hfill Any question?
}

\appendix
\backupbegin
\frame{
\frametitle{Markov model}
\begin{columns}[T]
\begin{column}{0.5\textwidth}
\begin{figure}
\includegraphics[scale=0.6]{img/modeling-markov}
\caption{States of the Markov model are clusters of queries}
\end{figure}
\end{column}
\begin{column}{0.58\textwidth}
\begin{block}{Query logs}
$H=<\textnormal{intention}, \textnormal{context}, \textnormal{preferences}>$
\end{block}
\begin{block}{Similarity metrics}
\begin{itemize}
\item $d_{MDX}(q_1,q_2)=\gamma\times d_{dim}(q_1,q_2)+(1-\gamma)\times d_h(q_1,q_2)$
\item $d_{MLP}(q_1,q_2)=\delta\times d_{MDX}(q_1,q_2)+(1-\delta)\times d_{pref}(q_1,q_2)$
\end{itemize}
\end{block}
\end{column}
\end{columns}
\hfill {\small \it In collaboration with Yves Vanrompay}
}



\frame{
\frametitle{Query prediction (future work)}
Architecture of a query prediction system, to be evaluated:
\begin{figure}
\includegraphics[width=\textwidth,trim=0pt 20pt 0pt 20pt]{img/architecture}
\end{figure}
\hfill{\small \it In collaboration with Yves Vanrompay}
}


\backupend


\end{document}




